#########################################################
# Diff-in-Diff and Panel Analysis of NVA Service Effects                      #
#########################################################
### Libraries
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lfe)
library(xtable)
library(tidyr)
library(dplyr)
library(broom)
library(patchwork)
library(readxl)

#####################################################################
#####################################################################
## 1. Load Data
#####################################################################
#####################################################################

# Diff-in-Diff Data
load("kaderdaten_panel_full.rda")
data <- master_pkz_year_panel

# create female variable
data$female <- ifelse(data$gender_recode=="female",1,0)
# create male variable
data$male <- ifelse(data$gender_recode=="female",0,1)

# create conscription timing variable
data$post1962 <- ifelse(data$year>1961,1,0)

# create time trends variable
setDT(data)
data[, time_trend := year - min(year, na.rm = TRUE)]
data$time_trend2 <- data$time_trend^2

data$age_cohort_factor <- factor(data$age_1962)
data$year_factor <- factor(data$year)

# generate some more comparison vars
data$economic_elite <- ifelse(data$social_history_recode=="economic_elite",1,0)
data$intelligence <- ifelse(data$social_history_recode=="intelligence",1,0)
data$worker_farmer <- ifelse(data$social_history_recode=="worker_farmer",1,0)

data$no_relatives_abroad <- ifelse(data$relatives_abroad_recode=="no_relatives_abroad",1,0)
data$no_returnee <- ifelse(data$returnee_from_brd_recode=="no_returnee",1,0)
data$father_kpd_sed <- ifelse(data$father_party_before1945_recode=="KPD"|data$father_party_before1945_recode=="SED",1,0)
data$mother_kpd_sed <- ifelse(data$mother_party_before1945_recode=="KPD"|data$mother_party_before1945_recode=="SED",1,0)

#####################################################################
#####################################################################
### Figure 1
#####################################################################
#####################################################################
viz_rank_data <- master_pkz_year_panel %>% 
  filter(age <= 65) %>% 
  filter(working_sample == 1) %>% 
  filter(gender_recode == "male") %>% 
  group_by(pkz) %>% 
  mutate(time_in_position = mean(rank_duration, na.rm = T), 
         has_all_ranks = sum(c(0:4) %in% as.numeric(salheiser_positions_correct)))


viz_rank_hist <- viz_rank_data %>% 
  group_by(age, salheiser_positions_correct) %>% 
  tally() %>% 
  ungroup() %>% 
  complete(age, nesting(salheiser_positions_correct), fill = list(n = 0)) 

viz_rank_mode <- viz_rank_data %>% 
  group_by(age, salheiser_positions_correct) %>% 
  tally() %>% 
  ungroup() %>% 
  complete(age, nesting(salheiser_positions_correct), fill = list(n = 0)) %>% 
  group_by(salheiser_positions_correct) %>% 
  summarise(modal_age = first(age[n == max(n)]))

# 
# rank_info <- tibble::tribble(
#   ~rank,  ~desc,
#   
#   4,  str_wrap("Top-level leadership: Mayor, state secretary, minister, director of combine"),
#   3,  str_wrap("Middle management: Manager of state-owned enterprise, judge,  high-level party secretary"),
#   2,  str_wrap("Head of department: Low-level party secretary, professor, editor-in-chief"),
#   1, str_wrap("Foreman/group leader: University employee, shift leader, deputy head of department"),
#   0,  str_wrap("No leadership: Clerk, worker, case handler, artisan, driver")
#   
# )


custom_colors <- (RColorBrewer::brewer.pal(n = 6, name = "Greys"))
custom_colors <- custom_colors[-1]

viz_rank_plot <- ggplot(viz_rank_hist, aes(x = age, 
                                           y = n, 
                                           fill = factor(salheiser_positions_correct))) + 
  geom_density(stat = "identity", alpha = .6) +
  geom_vline(data = viz_rank_mode,
             aes(xintercept = modal_age,
                 color = factor(salheiser_positions_correct)), 
             size = 1.5, 
             linetype = "dashed") +
  scale_fill_manual(name = "Rank", values = custom_colors)  +
  scale_color_manual(name = "Rank", values = custom_colors) +
  # scale_color_brewer(palette = "Greys") +
  # scale_fill_brewer(palette = "Greys") + 
  labs(x = "Age", y = "Frequency") +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") 




# For nomenclature --------------------------------------------------------


viz_nc_data <- viz_rank_data %>% 
  group_by(age, nc_dummy) %>% 
  tally()

viz_nc_mode <- viz_nc_data %>% 
  group_by(nc_dummy) %>% 
  summarise(modal_age = first(age[n == max(n)]))

custom_colors <- (RColorBrewer::brewer.pal(n = 9, name = "Greys"))
custom_colors <- custom_colors[-1]
custom_colors <- c(custom_colors[3], custom_colors[length(custom_colors)])

viz_nc_plot <- ggplot(viz_nc_data, aes(x = age, 
                                       y = n, 
                                       fill = factor(nc_dummy))) + 
  geom_density(stat = "identity", alpha = .6) +
  geom_vline(data = viz_nc_mode,
             aes(xintercept = modal_age,
                 color = factor(nc_dummy)),
             linetype = "dashed",
             size = 1.5) +
  scale_fill_manual(name = "Nomenklatura", values = custom_colors)  +
  scale_color_manual(name = "Nomenklatura", values = custom_colors) +
  labs(x = "Age", y = "Frequency") +
  theme_bw() +
  theme(panel.grid = element_blank(), 
        legend.position = "bottom") 



ggsave(plot = viz_rank_plot + viz_nc_plot, filename = "./Figures/NVA/sal_rank_plot.pdf", 
       width = 10, height = 4, scale = 0.9)

ggsave(plot = viz_rank_plot + viz_nc_plot, filename = "./Figures/NVA/sal_rank_plot.tiff", 
       width = 10, height = 4, scale = 0.9, units = "in", dpi = 300)



#####################################################################
#####################################################################
### Figure 2
#####################################################################
#####################################################################


# based on 18--25 vs. 26--34 
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<35,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)
data_diff$Cohort <- as.factor(ifelse(data_diff$cohort_binary==1,"Treated","Control"))
data_diff$Cohort <- factor(data_diff$Cohort,levels(data_diff$Cohort)[c(2,1)])  

p1 <- ggplot(data_diff,aes(x=age,y=salheiser_positions_correct,group=Cohort))+
  geom_smooth(aes(linetype=Cohort, 
                  fill = Cohort) ,
              size=0.5,color="black")+
  theme_bw()+
  theme(panel.grid =element_blank(), 
        legend.position = "bottom") +
  scale_fill_manual(values = c("#252525", "#bdbdbd")) +
  # theme(axis.text=element_text(size=15),axis.title=element_text(size=15))+
  xlab("Age")+ylab("Rank") +
  xlim(c(17,51))+
  annotate("rect", xmin = 18, xmax = 25, ymin = -Inf, ymax = Inf, fill = "grey80", alpha = .25, color = NA)+
  annotate("text", x = 21.5, y = 1.6, label = "Possible Start\n of NVA Service", size = 4)
ggsave(p1, filename = "age_rank_plot_new.pdf", scale = 0.8, width = 6, height = 6)

p2 <- ggplot(data_diff,aes(x=age,y=nc_dummy,group=Cohort))+
  geom_smooth(aes(linetype=Cohort, 
                  fill = Cohort) ,
              size=0.5,color="black")+
  theme_bw()+
  theme(panel.grid =element_blank(), 
        legend.position = "bottom") +
  scale_fill_manual(values = c("#252525", "#bdbdbd")) +
  # theme(axis.text=element_text(size=15),axis.title=element_text(size=15))+
  xlab("Age")+ylab("Pr(Nomenklatura membership)")+
  xlim(c(17,51))+
  annotate("rect", xmin = 18, xmax = 25, ymin = -Inf, ymax = Inf, fill = "grey80", alpha = .25, color = NA)+
  annotate("text", x = 21.5, y = 0.4, label = "Possible Start\n of NVA Service", size = 4)
ggsave(p2, filename = "age_nc_plot_new.pdf", scale = 0.8, width = 6, height = 6)




#####################################################################
#####################################################################
## Table 3
#####################################################################
#####################################################################
# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("salheiser_positions_correct~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("nc_dummy~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse

#####################################
# male <26 vs. male older than 25
#####################################
# subset data to men
data_diff <- data[data$female==0,]

# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m3 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se3 <- m3$cse
se4 <- m4$cse

#####################################
# male 18--25 vs. female 18--25
#####################################
data_diff <- data
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<26,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$female==0,1,0)

m5 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m6 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se5 <- m5$cse
se6 <- m6$cse


#####################################
# male 23-25 vs.male 26-28
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>22,]
data_diff <- data_diff[data_diff$age_1962<29,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m7 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
m8 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,],exactDOF='rM')
# Get standard errors
se7 <- m7$cse
se8 <- m8$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,m5,m6,m7,m8,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature","Rank","Nomenclature"),
          se=list(se1,se2,se3,se4,se5,se6,se7,se8),
          add.lines = list(c("Treated","M 18-25","M 18-25","M <26","M <26","M 18-25","M 18-25","M 23-25","M 23-25"),
                           c("Control","M 26-33","M 26-33","M >25","M >25","F 18-25","F 18-25","M 26-28","M 26-28"),
                           c("Year FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:2",keep=c(3),
          out="diff_in_diff_main_table3.tex"
)


models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"),
                    
                    broom::tidy(m5) %>% mutate(model_name = "m5"),
                    
                    broom::tidy(m6) %>% mutate(model_name = "m6"),
                    
                    broom::tidy(m7) %>% mutate(model_name = "m7"),
                    
                    broom::tidy(m8) %>% mutate(model_name = "m8"))

save(models, file="table3.Rdata")





#####################################################################
#####################################################################
### Figure 3
#####################################################################
#####################################################################
# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("party_sed~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("count_core_org~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla3 <- as.formula(paste("org_count~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla4 <- as.formula(paste("number_of_vol_functions~cohort_binary + post1962 + cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
m3 <- felm(fmla3, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla4, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse
se3 <- m3$cse
se4 <- m4$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c("Affected Cohort*Post-1961"),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("SED","Core Organization Membership","Organization Membership","Voluntary Functions"),
          se=list(se1,se2,se3,se4),
          add.lines = list(c("Treated","M 18-25","M 18-25","M 18-25","M 18-25"),
                           c("Control","M 26-33","M 26-33","M 26-33","M 26-33"),
                           c("Year FE","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:fig3",keep=c(3),
          out="diff_in_diff_voluntary_fig3.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"))

save(models, file="table_fig3.Rdata")

# make the figure from the saved coefficient estimates

mass_org_results <- models %>% 
  filter(!is.na(estimate)) %>% 
  arrange(desc(estimate)) %>% 
  mutate(term = c("Membership in \norganizations \n(count)",
                  "Membership in \ncore mass \n organizations \n(count)", 
                  "SED\nmembership\n(dummy)", 
                  "Voluntary\nFunctions\nin Mass\n Organizations\n(count)")) %>% 
  mutate(term = forcats::fct_reorder(term, desc(estimate)))

mass_org_coefplot <- ggplot(mass_org_results, 
                            aes(x = term, y =estimate)) + 
  geom_point(size = 2) + 
  geom_errorbar(aes(ymin = estimate - 1.96 * std.error, 
                    ymax = estimate + 1.96 * std.error), width = 0) + 
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_bw() +
  labs(#title = "CCDB\n", 
    y = "Difference-in-Difference Coefficient for \nAffected Cohort x Post-1962",
    x = "Additional Outcomes") +
  scale_x_discrete(labels = c("Membership in \norganizations \n(count)",
                              "Membership in \ncore mass \n organizations \n(count)", 
                              "SED\nmembership\n(dummy)", 
                              "Voluntary\nFunctions\nin Mass\n Organizations\n(count)")) +
  theme(panel.grid = element_blank(), 
        plot.title = element_text(hjust = 0.5), 
        plot.subtitle = element_text(size = 8), 
        axis.text.x = element_text(size = 7))

ggsave(mass_org_coefplot, filename = "./Figures/NVA/mass_org_coefplot.pdf", 
       width = 4.5, height = 4, scale = .9)

ggsave(mass_org_coefplot, filename = "./Figures/NVA/mass_org_coefplot.tiff", 
       width = 4.5, height = 4, scale = .9, units = "in", dpi = 300)







#####################################################################
#####################################################################
## Table 4
#####################################################################
#####################################################################
#

# diff-in-diff, only adjacent male cohorts, with individual level FE
fmla1 <- as.formula(paste("party_sed~cohort_binary + post1962 + cohort_binary*post1962+salheiser_positions_correct +salheiser_positions_correct*post1962+salheiser_positions_correct*cohort_binary+salheiser_positions_correct*cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla2 <- as.formula(paste("count_core_org~cohort_binary + post1962 + cohort_binary*post1962+salheiser_positions_correct +salheiser_positions_correct*post1962+salheiser_positions_correct*cohort_binary+salheiser_positions_correct*cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla3 <- as.formula(paste("org_count~cohort_binary + post1962 + cohort_binary*post1962+salheiser_positions_correct +salheiser_positions_correct*post1962+salheiser_positions_correct*cohort_binary+salheiser_positions_correct*cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))
fmla4 <- as.formula(paste("number_of_vol_functions~cohort_binary + post1962 + cohort_binary*post1962+salheiser_positions_correct +salheiser_positions_correct*post1962+salheiser_positions_correct*cohort_binary+salheiser_positions_correct*cohort_binary*post1962| year+sector_cf+pkz|0|age_1962"))

#####################################
# male 18-25 vs. male 26--33
#####################################
# subset data to men
data_diff <- data[data$female==0,]
data_diff <- data_diff[data_diff$max_rank==1,]

# throw out age cohorts
data_diff <- data_diff[data_diff$age_1962>17,]
data_diff <- data_diff[data_diff$age_1962<34,]
# create treated cohort indicator
data_diff$cohort_binary <- ifelse(data_diff$age_1962<26,1,0)

m1 <- felm(fmla1, data = data_diff[data_diff$working_sample==1,])
m2 <- felm(fmla2, data = data_diff[data_diff$working_sample==1,])
m3 <- felm(fmla3, data = data_diff[data_diff$working_sample==1,])
m4 <- felm(fmla4, data = data_diff[data_diff$working_sample==1,])

# Get standard errors
se1 <- m1$cse
se2 <- m2$cse
se3 <- m3$cse
se4 <- m4$cse


#####################################
# Make Table
#####################################
# Table in .tex:
stargazer(m1,m2,m3,m4,type="latex",notes = "Standard errors are clustered at the age-cohort level.",
          style="qje", 
          title            = "Analysis of System Engagement",
          covariate.labels = c(),
          dep.var.caption  = "",
          dep.var.labels.include = FALSE,
          column.labels   = c("SED","Org Count Core","Org Count","Vol. Funct."),
          se=list(se1,se2,se3,se4),
          add.lines = list(c("Treated","M 18-25","M 18-25","M 18-25","M 18-25"),
                           c("Control","M 26-33","M 26-33","M 26-33","M 26-33"),
                           c("Year FE","Yes","Yes","Yes","Yes"),
                           c("Sector FE","Yes","Yes","Yes","Yes"),
                           c("Individual-Level FE","Yes","Yes","Yes","Yes")),
          digits=2,label="tab:diff:4",
          out="diff_in_diff_voluntary_preference_table4.tex"
)

models <- bind_rows(broom::tidy(m1) %>% mutate(model_name = "m1"),
                    
                    broom::tidy(m2) %>% mutate(model_name = "m2"),
                    
                    broom::tidy(m3) %>% mutate(model_name = "m3"),
                    
                    broom::tidy(m4) %>% mutate(model_name = "m4"))

save(models, file="table15b.Rdata")




